rm(list = ls())
require(readstata13)
require(haven)
require(bizdays)
require(splitstackshape)
require(dplyr)
require(foreign)
require(readstata13)
require(readxl)

input_gdrive = "C:\\Users\\aaron.phipps\\OneDrive - West Point\\Projects\\Live Projects\\No threat time\\build\\input\\"
output_dir = "C:\\Users\\aaron.phipps\\OneDrive - West Point\\Projects\\Live Projects\\No threat time\\build\\temp"
susp_roster = read.dta13("C:\\Users\\aaron.phipps\\OneDrive - West Point\\Projects\\Live Projects\\No threat time\\build\\temp\\suspension_roster_daybyday.dta", nonint.factors = TRUE)
teacher_panel = read.dta13("C:\\Users\\aaron.phipps\\OneDrive - West Point\\Projects\\Live Projects\\No threat time\\build\\temp\\teacher_roster_for_susp.dta", nonint.factors = TRUE)

create_susp_data = function(this_year, susp_roster, teacher_panel, keep_columns){

  admin_buffer = 0
  me_buffer = 0
  
  susp_roster = susp_roster[which(susp_roster$year == this_year),]
  teacher_panel = teacher_panel[which(teacher_panel$year == this_year),]
  
  
  # import calendar and create variables
    academic_calendar = readxl::read_xlsx(path=paste(input_gdrive,"calendar_", this_year,".xlsx",sep=''),sheet = "Sheet1")
    
    holidays = as.Date(academic_calendar$holidays[!is.na(academic_calendar$holidays)])
    important_dates = academic_calendar[!is.na(academic_calendar$important),c("name","important")]
    important_dates$important = as.Date(important_dates$important)
    
    
    cal = create.calendar(name="evaldays", holidays=holidays, weekdays = c("saturday","sunday"), 
                          start.date = important_dates$important[which(important_dates$name == "First Day")],
                          end.date = important_dates$important[which(important_dates$name == "Last Day")])
    
    cutoff_m1 = important_dates$important[which(important_dates$name == "Master Educator Cutoff 1")]+me_buffer
    cutoff_p1 = important_dates$important[which(important_dates$name == "Administrative Cutoff 1")]+admin_buffer
    cutoff_p2 = important_dates$important[which(important_dates$name == "Administrative Cutoff 2")]+admin_buffer
    eval_end_date = important_dates$important[which(important_dates$name == "Evaluation End Date")]
    dccas_end_date = important_dates$important[which(important_dates$name == "DCCAS Testing End")]
    dccas_start_date = important_dates$important[which(important_dates$name == "DCCAS Testing Start")]
    eval_start = important_dates$important[which(important_dates$name == "Evaluation Start Date")]
    
    end_winter_break = academic_calendar$holidays[which(academic_calendar$name == "Winter Break End")]
    
    bizdays(important_dates$important[which(important_dates$name == "Evaluation Start Date")], dccas_start_date, cal)
    bizdays(cutoff_m1, dccas_end_date, cal)
    
    
    start_date = cutoff_m1
    end_date = cutoff_p2
  
  # Create dataset framework  
    susp.by.date = data.frame(dates=as.Date(character()), year=integer(), month=integer(), is_bizday=integer(),
                              weekday=integer(), id=integer(), susp=integer(), nothreat=integer(), postconf_p2=integer(), postconf_m2=integer(), sch_id = integer())
  
  

  # cycle through each teacher
  
  #
  for(j in 1:length(unique(teacher_panel$id))){
    t = unique(teacher_panel$id)[j]
    
    # select teacher's row from teacher panel
    tpanel.tmp = teacher_panel[which(teacher_panel$id == t),]
    sch_id = tpanel.tmp$sch_id[1]
    
    # create calendar dataset
    tmp.cal = create.calendar(name="evaldays", holidays=holidays, weekdays = c("saturday","sunday"), 
                              start.date = start_date, end.date = end_date)
    nrows = nrow(tmp.cal$dates.table[which(tmp.cal$dates.table[,4]==1),])
    
    # create final rows for the teacher that will be added to hte dataset
    t.table = cbind(as.data.frame(tmp.cal$dates.table[which(tmp.cal$dates.table[,4]==1),]),
                    data.frame(list(id=rep(t,nrows),sch_id=rep(sch_id,nrows), susp=rep(NA,nrows),
                                    nothreat=rep(NA,nrows),postconf_p2=rep(NA,nrows),postconf_m2=rep(NA,nrows))))
    t.table$dates = as.Date(t.table$dates, origin = "1970-01-01")
    
    # find suspension records for this teacher
      # look only at short-term suspensions (less than 10 days)
        susp.tmp = susp_roster[intersect(intersect(which(susp_roster$id == t),which(susp_roster$suspensiondays<10)),which(susp_roster$year_dose >= 0.25)),]
      
      # grab suspensions within dates of interest
        t2 = susp.tmp[which(susp.tmp$date %in% t.table$dates),]
    
      # sum up suspensions by date
        t3 = t2 %>% group_by(date) %>% summarize(count = n())
    
      # now fill in all the suspension dates (if there are any)
        if(nrow(t3) > 0){
          for(m in 1:nrow(t3)){
            myrow = which(t.table$dates == t3$date[m])
            t.table$susp[myrow] = t3$count[m]
          }
        }
        
    t.table$susp[is.na(t.table$susp)] = 0
    
    if(length(tpanel.tmp$obsdate_2p) > 0 && length(tpanel.tmp$obsdate_2m) > 0 && length(tpanel.tmp$confdate_2p)>0 && length(tpanel.tmp$confdate_2m) > 0){
      if(!is.na(tpanel.tmp$obsdate_2p) && !is.na(tpanel.tmp$obsdate_2m) && !is.na(tpanel.tmp$confdate_2p) && !is.na(tpanel.tmp$confdate_2m)){
        last.eval = max(tpanel.tmp$obsdate_2p,tpanel.tmp$obsdate_2m)
        confdate_2p = tpanel.tmp$confdate_2p[1]
        confdate_2m = tpanel.tmp$confdate_2m[1]
        if (last.eval < end_date){
          t.table$nothreat[which(t.table$dates > last.eval)] = 1
        }
        if (confdate_2p < end_date){
          t.table$postconf_p2[which(t.table$dates > confdate_2p)] = 1
        }
        if (confdate_2m < end_date){
          t.table$postconf_m2[which(t.table$dates > confdate_2m)] = 1
        }
        t.table$nothreat[is.na(t.table$nothreat)] = 0
        t.table$postconf_p2[is.na(t.table$postconf_p2)] = 0
        t.table$postconf_m2[is.na(t.table$postconf_m2)] = 0
      }
    }
    for(col in keep_columns){
      keep_val = tpanel.tmp[1,col]
     t.table[[col]] = rep(keep_val,nrows)
    }
    
    susp.by.date = rbind(susp.by.date,t.table)
  }
  
  write_dta(susp.by.date, paste(output_dir,"\\susp_by_date_", this_year, ".dta",sep=''))
  
}

keep_columns = c("ivamath","ivaread","subject","grade", "experience")

for(this_year in c(2011,2012))
{
  create_susp_data(this_year = this_year, susp_roster = susp_roster, teacher_panel = teacher_panel, keep_columns = keep_columns)
}



